Goal of the script

This script plots sensor data in order to visualize the measurements recorded throughout the tool function experiment. Variables of interest are: * Penetration depth

dir_in <- "analysis_ST/derived_data/"
dir_out <- "analysis_ST/plots"

Raw data must be located in ~/analysis_ST/derived_data/.
Formatted data will be saved in ~/analysis_ST/plots. The knit directory for this script is the project directory.


Load packages

pack_to_load <- c("tidyverse", "R.utils", "openxlsx", "tools", 
                  "patchwork", "doBy", "ggrepel", "ggplot2")
sapply(pack_to_load, library, character.only = TRUE, logical.return = TRUE)
Warning: package 'tibble' was built under R version 4.1.3
Warning: package 'tidyr' was built under R version 4.1.3
Warning: package 'readr' was built under R version 4.1.3
Warning: package 'dplyr' was built under R version 4.1.3
Warning: package 'R.utils' was built under R version 4.1.3
Warning: package 'openxlsx' was built under R version 4.1.3
Warning: package 'doBy' was built under R version 4.1.3
tidyverse   R.utils  openxlsx     tools patchwork      doBy   ggrepel   ggplot2 
     TRUE      TRUE      TRUE      TRUE      TRUE      TRUE      TRUE      TRUE 

Get name, path and information of the file

data_file <- list.files(dir_in, pattern = "\\.xlsx$", full.names = TRUE)
md5_in <- md5sum(data_file)
info_in <- data.frame(file = basename(names(md5_in)), checksum = md5_in, row.names = NULL)

Load data into R object

imp_data <- read.xlsx(data_file)
str(imp_data)
'data.frame':   479933 obs. of  11 variables:
 $ Sample      : chr  "FLT8-7" "FLT8-7" "FLT8-7" "FLT8-7" ...
 $ Angle       : chr  "35°" "35°" "35°" "35°" ...
 $ Task        : chr  "carving" "carving" "carving" "carving" ...
 $ Raw_material: chr  "Flint" "Flint" "Flint" "Flint" ...
 $ Stroke      : num  1 1 1 1 1 1 1 1 1 1 ...
 $ Step        : num  1 2 3 4 5 6 7 8 9 10 ...
 $ Force       : num  -58.9 -59.2 -62.4 -59.6 -59.2 ...
 $ Friction    : num  -9.06 -8.84 -31.52 -28.64 -27.95 ...
 $ Depth       : num  5.57 5.59 5.72 5.75 5.58 ...
 $ Position    : num  65 66.4 97 155.4 211.3 ...
 $ Velocity    : num  -1.72e-04 6.87e+01 4.61e+02 5.94e+02 4.65e+02 ...

The imported file is: “~/analysis_ST/derived_data/TFE_inotec.xlsx”

Plot each of the selected numeric variable

Plots showing the strokes as lines

# exclude the strokes with the wrong/missing values: FLT8-3 -> stroke 1032 - 2000, FLT8-4 ->   stroke 243 + 244 + 1921, LYDIT5-7 -> 1997 - 2000 
good_data <- imp_data[-c(410218:419907, 242382:242401, 259107:259116,359872:359911), ]

# good_data <- imp_data


# plot all 2000 strokes per sample divided by 40 
# split the data in the individual 24 samples
sp <- split(good_data, good_data[["Sample"]])


for (i in seq_along(sp)) {
  # create a sequence of every ~ 50th strokes 
  seq_st <- seq(1, length(unique(sp[[i]][["Stroke"]])), by = 40) %>% 
            c(max(unique(sp[[i]][["Stroke"]])))
  dat_i_all <- sp[[i]] %>% 
               filter(Stroke %in% seq_st)
  range_depth <- range(dat_i_all[["Depth"]])
  p1 <- ggplot(data = dat_i_all, aes(x = Step, y = Depth, colour = Stroke)) +
        geom_line(aes(group = Stroke), alpha = 0.3) + 
        labs(x = "Step", y = "Depth (mm)") + ylab(NULL) +
        # reverse the legend starting with 0 going to 2000 strokes    
        scale_colour_continuous(trans = "reverse") + 
        coord_cartesian(ylim = range_depth) +
        # change the 'Step-number' in the x-legend  
        scale_x_continuous(breaks=c(1, 4, 7, 10)) +
          theme_classic()
       
# plot only the first 50 strokes per sample  
  dat_i_50 <- sp[[i]] %>% 
              # take only the first 50 strokes per sample
              filter(Stroke %in% 1:50)
  p2 <- ggplot(data = dat_i_50) +
        geom_line(aes(x = Step, y = Depth, colour = Stroke, group = Stroke), alpha = 0.3) + 
        labs(x = "Step", y = "Depth (mm)") + 
        scale_colour_continuous(trans = "reverse") + 
        coord_cartesian(ylim = range_depth) +
        scale_x_continuous(breaks=c(1, 4, 7, 10)) +
          theme_classic()
  
  # patchwork plot
  p <- p2 + p1 + plot_annotation(title = names(sp)[i]) 
  print(p)

  # save to PDF
  file_out <- paste0(file_path_sans_ext(info_in[["file"]]), "_plot_", 
                       names(sp)[i], ".pdf")
  ggsave(filename = file_out, plot = p, path = dir_out, device = "pdf")
} 

Plots showing the relative penetration depths

Plot of all samples

# calculate the relative depths reached per sample
rel.depth <- function(x) {
  no.na <- x[!is.na(x)]
  out <- min(no.na) - x[1]
  return(out)
}

# define grouping variable and compute the summary statistics 
depth <- summaryBy(Depth ~ Sample+Angle+Task+Raw_material, 
                  data=good_data, 
                  FUN=rel.depth)

str(depth)
'data.frame':   24 obs. of  5 variables:
 $ Sample         : chr  "FLT8-1" "FLT8-10" "FLT8-11" "FLT8-12" ...
 $ Angle          : chr  "45°" "45°" "45°" "45°" ...
 $ Task           : chr  "cutting" "carving" "carving" "carving" ...
 $ Raw_material   : chr  "Flint" "Flint" "Flint" "Flint" ...
 $ Depth.rel.depth: num  -0.284 -1.881 -0.142 -0.111 -0.313 ...
# colour 
depth[["Raw_material"]] <- factor(depth[["Raw_material"]])
custom.col7 <- data.frame(type = levels(depth$Raw_material), 
                           col = c("#899DA4", "#DC863B")) 
depth$col <- custom.col7[depth$Raw_material, "col"]


# plot all depth points in one facet plot 
p3 <- ggplot(data = depth, aes(x = Angle, y = Depth.rel.depth, colour = Raw_material)) +
       geom_point() + labs(y = "Relative depth (mm)") +
       facet_wrap(~Task, strip.position = "bottom") +
       # avoid overplotting of the labels (sample IDs)
       geom_text_repel(aes(label=Sample), size = 2, nudge_x = -0.4, 
                       segment.size = 0.1, force = 2, seed = 123) +
       scale_y_continuous(trans = "reverse") +
       scale_x_discrete(position="top") +
       # remove the "_" between "Raw_material in the legend 
         labs(colour = gsub("_", " ", "Raw_material")) + 
       scale_colour_manual(values = custom.col7$col) + 
         theme_classic()

print(p3)

# save to PDF
file_out <- paste0(file_path_sans_ext(info_in[["file"]]), "_P3_depth_plot", ".pdf")
ggsave(filename = file_out, plot = p3, path = dir_out, device = "pdf", 
       width = 180, units = "mm")
# calculate the absolute depths reached per sample
abs.depth <- function(x) {
  out <- abs(min(x) - max(x))
}

# define grouping variable and compute the summary statistics 
depth <- summaryBy(Depth ~ Sample+Angle+Task+Raw_material, 
                  data=imp_data, 
                  FUN=abs.depth)

str(depth)
'data.frame':   24 obs. of  5 variables:
 $ Sample         : chr  "FLT8-1" "FLT8-10" "FLT8-11" "FLT8-12" ...
 $ Angle          : chr  "45°" "45°" "45°" "45°" ...
 $ Task           : chr  "cutting" "carving" "carving" "carving" ...
 $ Raw_material   : chr  "Flint" "Flint" "Flint" "Flint" ...
 $ Depth.abs.depth: num  0.687 11.923 0.822 1.685 0.69 ...
# colour 
depth[["Raw_material"]] <- factor(depth[["Raw_material"]])
custom.col7 <- data.frame(type = levels(depth$Raw_material), 
                           col = c("#899DA4", "#DC863B")) 
depth$col <- custom.col7[depth$Raw_material, "col"]


# plot all depth points in one facet plot 
p3b <- ggplot(data = depth, aes(x = Angle, y = Depth.abs.depth, colour = Raw_material)) +
       geom_point() + labs(y = "Absolute depth (mm)") +
       facet_wrap(~Task, strip.position = "bottom") +
       # avoid overplotting of the labels (sample IDs)
       geom_text_repel(aes(label=Sample), size = 2, nudge_x = -0.4, 
                       segment.size = 0.1, force = 2, seed = 123) +
       scale_y_continuous(trans = "reverse") +
       scale_x_discrete(position="top") +
       # remove the "_" between "Raw_material in the legend 
         labs(colour = gsub("_", " ", "Raw_material")) + 
       scale_colour_manual(values = custom.col7$col) + 
         theme_classic()

print(p3b)

# save to PDF
file_out <- paste0(file_path_sans_ext(info_in[["file"]]), "_P3b_depth_plot", ".pdf")
ggsave(filename = file_out, plot = p3b, path = dir_out, device = "pdf", 
       width = 180, units = "mm")

Plot of all samples except the three outliers

# define the outlier (FLT8-10 is here defined as outlier, because the result of this sample   is not comparable to the other samples)
bad_sample <- "FLT8-10"
# create data frames without the outlier
good_data_outlier <- good_data[!good_data$Sample %in% bad_sample, ]
# split the data in the individual 21 samples
sp_good <- split(good_data_outlier, good_data_outlier[["Sample"]])


# define grouping variable and compute the summary statistics 
depth_good <- summaryBy(Depth ~ Sample + Angle + Task + Raw_material, data = good_data_outlier, 
                  FUN = rel.depth)

str(depth_good)
'data.frame':   23 obs. of  5 variables:
 $ Sample         : chr  "FLT8-1" "FLT8-11" "FLT8-12" "FLT8-2" ...
 $ Angle          : chr  "45°" "45°" "45°" "45°" ...
 $ Task           : chr  "cutting" "carving" "carving" "cutting" ...
 $ Raw_material   : chr  "Flint" "Flint" "Flint" "Flint" ...
 $ Depth.rel.depth: num  -0.284 -0.142 -0.111 -0.313 -0.333 ...
# plot all depth points in one facet plot 
p4 <- ggplot(data = depth_good, aes(x = Angle, y = Depth.rel.depth, 
                                    colour = Raw_material)) +
       geom_point() + labs(y = "Relative depth (mm)") +
       facet_wrap(~Task, strip.position = "bottom") +
       geom_text_repel(aes(label=Sample), size = 2, 
                       nudge_x = -0.4, segment.size = 0.1, force = 2, seed = 123) +
       scale_y_continuous(trans = "reverse") +
       scale_x_discrete(position="top") +
       # remove the "_" between "Raw_material in the legend 
         labs(colour = gsub("_", " ", "Raw_material")) + 
       scale_colour_manual(values = custom.col7$col) + 
         theme_classic()

print(p4)

# save to PDF
file_out <- paste0(file_path_sans_ext(info_in[["file"]]), "_P4_depth_plot", ".pdf")
ggsave(filename = file_out, plot = p4, path = dir_out, device = "pdf", 
       width = 180, units = "mm")

The files will be saved as “~/analysis_ST/plots.[ext]”.


sessionInfo() and RStudio version

sessionInfo()
R version 4.1.0 (2021-05-18)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19043)

Matrix products: default

locale:
[1] LC_COLLATE=German_Germany.1252  LC_CTYPE=German_Germany.1252   
[3] LC_MONETARY=German_Germany.1252 LC_NUMERIC=C                   
[5] LC_TIME=German_Germany.1252    

attached base packages:
[1] tools     stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] ggrepel_0.9.1     doBy_4.6.13       patchwork_1.1.1   openxlsx_4.2.5   
 [5] R.utils_2.11.0    R.oo_1.24.0       R.methodsS3_1.8.1 forcats_0.5.1    
 [9] stringr_1.4.0     dplyr_1.0.9       purrr_0.3.4       readr_2.1.2      
[13] tidyr_1.2.0       tibble_3.1.6      ggplot2_3.3.6     tidyverse_1.3.1  

loaded via a namespace (and not attached):
 [1] httr_1.4.2           sass_0.4.1           jsonlite_1.8.0      
 [4] modelr_0.1.8         bslib_0.3.1          microbenchmark_1.4.9
 [7] assertthat_0.2.1     highr_0.9            cellranger_1.1.0    
[10] yaml_2.3.5           pillar_1.7.0         backports_1.4.1     
[13] lattice_0.20-44      glue_1.6.2           digest_0.6.29       
[16] rvest_1.0.2          colorspace_2.0-3     htmltools_0.5.2     
[19] Matrix_1.3-3         pkgconfig_2.0.3      broom_0.8.0         
[22] haven_2.5.0          scales_1.2.0         tzdb_0.3.0          
[25] generics_0.1.2       farver_2.1.0         ellipsis_0.3.2      
[28] withr_2.5.0          cli_3.3.0            magrittr_2.0.3      
[31] crayon_1.5.1         readxl_1.4.0         evaluate_0.15       
[34] fs_1.5.2             fansi_1.0.3          MASS_7.3-54         
[37] xml2_1.3.3           hms_1.1.1            lifecycle_1.0.1     
[40] munsell_0.5.0        reprex_2.0.1         zip_2.2.0           
[43] compiler_4.1.0       Deriv_4.1.3          jquerylib_0.1.4     
[46] rlang_1.0.2          grid_4.1.0           rstudioapi_0.13     
[49] labeling_0.4.2       rmarkdown_2.14       gtable_0.3.0        
[52] DBI_1.1.2            R6_2.5.1             lubridate_1.8.0     
[55] knitr_1.39           fastmap_1.1.0        utf8_1.2.2          
[58] stringi_1.7.6        Rcpp_1.0.8.3         vctrs_0.4.1         
[61] dbplyr_2.1.1         tidyselect_1.1.2     xfun_0.30           

RStudio version 1.4.1717.

Cite R packages used

for (i in pack_to_load) print(citation(i), bibtex = FALSE)

  Wickham et al., (2019). Welcome to the tidyverse. Journal of Open
  Source Software, 4(43), 1686, https://doi.org/10.21105/joss.01686


To cite package 'R.utils' in publications use:

  Henrik Bengtsson (2021). R.utils: Various Programming Utilities. R
  package version 2.11.0. https://CRAN.R-project.org/package=R.utils


To cite package 'openxlsx' in publications use:

  Philipp Schauberger and Alexander Walker (2021). openxlsx: Read,
  Write and Edit xlsx Files. R package version 4.2.5.
  https://CRAN.R-project.org/package=openxlsx


The 'tools' package is part of R.  To cite R in publications use:

  R Core Team (2021). R: A language and environment for statistical
  computing. R Foundation for Statistical Computing, Vienna, Austria.
  URL https://www.R-project.org/.

We have invested a lot of time and effort in creating R, please cite it
when using it for data analysis. See also 'citation("pkgname")' for
citing R packages.


To cite package 'patchwork' in publications use:

  Thomas Lin Pedersen (2020). patchwork: The Composer of Plots. R
  package version 1.1.1. https://CRAN.R-project.org/package=patchwork


To cite package 'doBy' in publications use:

  Søren Højsgaard and Ulrich Halekoh (2022). doBy: Groupwise
  Statistics, LSmeans, Linear Estimates, Utilities. R package version
  4.6.13. https://CRAN.R-project.org/package=doBy

ATTENTION: This citation information has been auto-generated from the
package DESCRIPTION file and may need manual editing, see
'help("citation")'.


To cite package 'ggrepel' in publications use:

  Kamil Slowikowski (2021). ggrepel: Automatically Position
  Non-Overlapping Text Labels with 'ggplot2'. R package version 0.9.1.
  https://CRAN.R-project.org/package=ggrepel


To cite ggplot2 in publications, please use:

  H. Wickham. ggplot2: Elegant Graphics for Data Analysis.
  Springer-Verlag New York, 2016.

END OF SCRIPT